

clear all
set mem 5g 
set matsize 11000
set more off
cap log close 

gl raw_path = "/Users/Wei/Dropbox/Census"
cd "/Users/Wei/Dropbox/Twins/Restat-Twins/Data"
log using "Twins-data.log",replace 
use "$raw_path/1982dta", clear
de
gen prov = province 
egen hhid = group(province-hhcode)
keep hhid hhoftyp sex age releahh prov sex edulevel rural nationty chldborn chldlive
de

keep if hhoftyp == 1 
drop hhoftyp 
gen male = sex == 1 

drop if age > 120 
gen child = releahh == 3 
egen child_num = sum(child), by(hhid)

* Have child
drop if child_num == 0
gen boy = child& sex ==1 
gen girl = child & sex ==2 

* Mother == 1 
gen father = (releahh == 1 | releahh == 2) & sex == 1
gen mother = (releahh == 2 | releahh == 1 ) & sex == 2 
egen mother_num = sum(mother),  by(hhid)
keep if mother_num == 1 

* Mother birth age >= 15  
gen f_age = age if father 
gen m_age = age if mother 
egen h_f_age = max(f_age), by(hhid)
egen h_m_age = max(m_age), by(hhid)
gen c_age = age if child 
egen h_c_age = max(c_age), by(hhid)
drop if h_m_age - h_c_age <= 15
drop if h_m_age - h_c_age > 50 
gen m_birth_age = h_m_age - h_c_age
drop h_c_age f_age m_age 

* Children number 
gen n_chldborn = chldborn * mother 
egen h_chldborn = max(n_chldborn), by(hhid)
gen n_chldlive = chldlive * mother 
egen h_chldlive = max(n_ chldlive), by(hhid)
keep if h_chldborn == h_chldlive
keep if h_chldborn == child_num
drop n_chldborn h_chldborn n_chldlive  h_chldlive

* Children maximum age < 18 
gen child_age = age if child 
egen max_child_age = max(child_age), by(hhid)
drop if max_child_age >= 18
drop max_child_age 

* father education, mother education
gen f_edu = edulevel if father
gen m_edu = edulevel if mother 
egen h_f_edu = max(f_edu), by(hhid)
egen h_m_edu = max(m_edu), by(hhid)
drop f_edu m_edu
recode h_f_edu (6 = 1) (5 = 2) (4 = 3) (1/3 = 4)(. = 99)
recode h_m_edu (6 = 1) (5 = 2) (4 = 3) (1/3 = 4)(. = 99)

* Ethnicity 
gen f_han = nationty == 1 if father
gen m_han = nationty == 1 if mother 
egen h_f_han = max(f_han), by(hhid)
egen h_m_han = max(m_han), by(hhid)
gen h_fm_min = h_f_han == 0 | h_f_han == 0 
drop f_han m_han 


* keep child data 
keep if child 
keep hhid sex age edulevel h_* child_num m_birth_age rural prov
order hhid 
sort hhid age
cap drop twins 
gen twins = hhid == hhid[_n-1] & age == age[_n-1]
replace twins = hhid == hhid[_n+1] & age == age[_n+1] if twins == 0
recode edulevel (6 = 1) (5 = 2) (4 = 3) (1/3 = 4)(. = 99)

preserve 
keep hhid age 
duplicates drop 
gsort hhid -age 
bys hhid: gen order = _n 
save "order82",replace
restore
 
merge m:1 hhid age using   "order82"
drop _merge 
cap erase "order82.dta"
gen year = 1982 
count
save "twins1982",replace 


*******************************************

use "$raw_path/census90",clear
egen hhid = group(prov prefect county address house) 
de

keep hhid nation prov prefect county address house houstype usuresty usuresid age relation edulevel numbir* numsur* sex
de

gen urban_hukou = houstype == 2 
gen resid_type = usuresty if usuresid == 1 
egen resid_type_comm = mode(resid_type), by(prov prefect county address) minmode
drop resid_type 
ren resid_type_comm resid_type
drop prefect county address house
drop if mi(resid_type)

gen birth_year = int(age/100)
gen birth_month = age - int(age/100)*100
ren age birth_ym
gen age1 = 990 - birth_year 
gen age2 = 7 - birth_month
gen age = age1 if age2 > 0 
replace age = age1 - 1 if age2 <= 0 
drop age1 age2 
drop if age <0 | age > 120 

ren houstype hktype

* have children at home 
gen child = relation == 3 
egen child_num = sum(child), by(hhid)
tab child_num
drop if child_num == 0

gen boy = child & sex == 1 
gen girl = child & sex == 2
egen num_boy = sum(boy), by(hhid)
egen num_girl = sum(girl), by(hhid)

* at least mother information 
gen father = (relation == 1 | relation == 2) & sex == 1
gen mother = (relation == 2 | relation == 1 ) & sex == 2 
egen mother_num = sum(mother) ,by(hhid)
tab mother_num
drop if mother_num != 1 


* Mother age, child age, birth age 
gen f_age = age if father 
gen m_age = age if mother 
egen h_f_age = max(f_age), by(hhid)
egen h_m_age = max(m_age), by(hhid)
gen c_age = age if child 
egen h_c_age = max(c_age), by(hhid)
drop if h_m_age - h_c_age <= 15
drop if h_m_age - h_c_age > 50
gen m_birth_age = h_m_age - h_c_age
drop f_age m_age c_age

* born = survive (male, female)
gen n_male = numbirm if mother 
gen n_female = numbirf if mother 
gen n_male_s = numsurm if mother 
gen n_female_s = numsurf if mother 
egen h_n_male = max(n_male), by(hhid)
egen h_n_female = max(n_female), by(hhid)
egen h_n_male_s = max(n_male_s), by(hhid)
egen h_n_female_s = max(n_female_s), by(hhid)
drop if h_n_male != h_n_male_s
drop if h_n_female != h_n_female_s
drop if h_n_male != num_boy
drop if h_n_female != num_girl
drop n_male n_female n_male_s n_female_s

* child age <= 18 
gen child_age = age if child 
egen max_child_age = max(child_age), by(hhid)
drop if max_child_age >= 18
drop max_child_age 


* father education, mother education
recode edulevel (1 =1 )(2 = 2) (3 = 3) (4/10 = 4)
gen f_edu = edulevel if father
gen m_edu = edulevel if mother 
egen h_f_edu = max(f_edu), by(hhid)
egen h_m_edu = max(m_edu), by(hhid)

* father eth, mother eth
gen f_han = nation == 1 if father
gen m_han = nation == 1 if mother 
egen h_f_han = max(f_han), by(hhid)
egen h_m_han = max(m_han), by(hhid)
gen h_fm_min = h_f_han == 0 | h_f_han == 0 

* keep child data 
keep if child 
keep hhid prov sex age hktype birth_year birth_ym edulevel h_* child_num m_birth_age resid_type birth_month urban_hukou
sort hhid age 

gen twins = hhid == hhid[_n-1] & age == age[_n-1] & birth_year == birth_year[_n-1] & birth_month == birth_month[_n-1]
replace twins = hhid == hhid[_n+1] & age == age[_n+1] & birth_year == birth_year[_n+1] & birth_month == birth_month[_n+1] if twins == 0


preserve 
keep hhid birth_year birth_month age
duplicates drop 
gsort hhid birth_year birth_month -age
bys hhid: gen order = _n 
save "order90",replace
restore

merge m:1 hhid age birth_year birth_month using "order90"
drop _merge 
gen year = 1990 
count
save "twins1990",replace 
cap erase "order90.dta"

**************

use "$raw_path/census2000",clear 
de

ren h02 hubie 
ren r07 hktype 
ren r041 birth_year 
ren r042 birth_month 
ren r02 relation 
drop h0 hubie 
ren r03 sex 
ren r23 marr
ren r241 marryy1st
ren r242 marrym1st
ren r251 numbirm
ren r252 numbirf
ren r253 numsurm
ren r254 numsurf
ren r151 schooling
ren r05 nation
ren ra0 resid_type
gen prov = substr(id, 1,2)
egen hhid = group(id)
keep hhid  hktype birth_year birth_month sex relation marr marryy1st marrym1st numbir* numsur* schooling nation resid_type prov
de

gen age1 = 2000 - birth_year 
gen age = age1 if birth_month< 11 
replace age = age1 -1  if birth_month >= 11 
drop age1  

drop if age >= 120 
drop if hktype == 0

egen num_people= sum(1), by(hhid)
drop if num_people > 3000
drop num_people
* have children at home 
gen child = relation == 2 
egen child_num = sum(child), by(hhid)
drop if child_num == 0
gen boy = child & sex == 1 
gen girl = child & sex == 2
egen num_boy = sum(boy), by(hhid)
egen num_girl = sum(girl), by(hhid)


* at least mother information 
gen father = (relation == 1 | relation == 0) & sex == 1
gen mother = (relation == 0 | relation == 1 ) & sex == 2 
egen mother_num = sum(mother) ,by(hhid)
tab mother_num
drop if mother_num != 1 

* Mother first marriage 
gen m_first = marr == 2 & mother
egen h_m_first = max(m_first),by(hhid)
keep if h_m_first == 1 


gen m_marr_1 = marryy1st if mother 
egen h_year_mar1 = max(m_marr_1), by(hhid)
gen m_marr_1m = marrym1st if mother 
egen h_month_mar1 = max(m_marr_1m), by(hhid)


* Mother age, child age, birth age 
gen f_age = age if father 
gen m_age = age if mother 
egen h_f_age = max(f_age), by(hhid)
egen h_m_age = max(m_age), by(hhid)
gen c_age = age if child 
egen h_c_age = max(c_age), by(hhid)
drop if h_m_age - h_c_age <= 15
gen m_birth_age = h_m_age - h_c_age


* born = survive (male, female)

gen n_male = numbirm if mother 
gen n_female = numbirf if mother 
gen n_male_s = numsurm if mother 
gen n_female_s = numsurf if mother 
egen h_n_male = max(n_male), by(hhid)
egen h_n_female = max(n_female),by(hhid)
egen h_n_male_s = max(n_male_s),by(hhid)
egen h_n_female_s = max(n_female_s), by(hhid)
drop if h_n_male != h_n_male_s
drop if h_n_female != h_n_female_s
drop if h_n_male != num_boy
drop if h_n_female != num_girl
drop h_n_male h_n_male_s h_n_female h_n_female_s 

* child age < 18 
gen child_age = age if child 
egen max_child_age = max(child_age), by(hhid)
drop if max_child_age >= 18


* father education, mother education
recode schooling (0/2 = 1) (3 =2)(4=3)(5/10 = 4)
gen f_edu = schooling if father
gen m_edu = schooling if mother 
egen h_f_edu = max(f_edu), by(hhid)
egen h_m_edu = max(m_edu), by(hhid)

* father eth, mother eth
gen f_han = nation == 1 if father
gen m_han = nation == 1 if mother 
egen h_f_han = max(f_han), by(hhid)
egen h_m_han = max(m_han), by(hhid)
gen h_fm_min = h_f_han == 0 | h_f_han == 0 
gen urban_hukou = hktype == 2 

* keep child data 
keep if child 
keep hhid sex age hktype birth_year schooling h_year_mar1  h_month_mar1  h_* child_num m_birth_age prov resid_type birth_month urban_hukou

gsort hhid age -birth_year -birth_month
drop if birth_year == . 
drop if birth_month == . 
gen twins = hhid == hhid[_n-1] & age == age[_n-1] & birth_year == birth_year[_n-1] & birth_month == birth_month[_n-1] 
replace twins = hhid == hhid[_n+1] & age == age[_n+1] & birth_year == birth_year[_n+1] & birth_month == birth_month[_n+1] if twins == 0

preserve 
keep hhid birth_year birth_month age 
duplicates drop 
gsort hhid birth_year birth_month -age
bys hhid: gen order = _n
save "order00",replace
restore 

merge m:1 hhid age birth_year birth_month using "order00"
drop _merge 
gen year = 2000
count
save "twins2000",replace 
cap erase "order00.dta"


* 2005 Population Sample 

use "$raw_path/pdata1_num_ren",clear
de

keep region hhid hhtype hktype sex ethnic year_birth moth_birth relation maritus year_mar1 month_mar1 educ age n_*birth n_*child city_flag
de

keep if hhtype == 1 
drop hhtype 

gen urban_hukou = hktype == 2 
gen age1 = 2005 - year_birth 
gen age2 = 11- moth_birth 
ren age age_nbs 
gen age = age1 if age2 > 0
replace age = age1 -1 if age2 <= 0 
drop age_nbs 

drop if age >= 120 


* have children at home 
gen child = relation == 2 
egen child_num = sum(child), by(region hhid)
drop if child_num == 0
gen boy = child & sex == 1 
gen girl = child & sex == 2
egen num_boy = sum(boy), by(region hhid)
egen num_girl = sum(girl), by(region hhid)

* at least mother information 
gen father = (relation == 1 | relation == 0) & sex == 1
gen mother = (relation == 0 | relation == 1 ) & sex == 2 
egen mother_num = sum(mother), by(region hhid)
tab mother_num
drop if mother_num != 1 


* Mother first marriage 
gen m_first = maritus == 2 & mother
egen h_m_first = max(m_first),by(region hhid)
keep if h_m_first == 1 
drop h_m_first
gen m_marr_1 = year_mar1 if mother 
egen h_year_mar1 = max(m_marr_1), by(region hhid)
gen m_marr_1m = month_mar1 if mother 
egen h_month_mar1 = max(m_marr_1m), by(region hhid)

* Mother age, child age, birth age 
gen f_age = age if father 
gen m_age = age if mother 
egen h_f_age = max(f_age), by(region hhid)
egen h_m_age = max(m_age), by(region hhid)
gen c_age = age if child 
egen h_c_age = max(c_age), by(region hhid)
drop if h_m_age - h_c_age <= 15
drop if h_m_age - h_c_age > 50
gen m_birth_age = h_m_age - h_c_age

* born = survive (male, female)
gen n_male = n_mbirth if mother 
gen n_female = n_fbirth if mother 
gen n_male_s = n_mchild if mother 
gen n_female_s = n_fchild if mother 
egen h_n_male = max(n_male), by(region hhid)
egen h_n_female = max(n_female), by(region hhid)
egen h_n_male_s = max(n_male_s), by(region hhid)
egen h_n_female_s = max(n_female_s), by(region hhid)
drop if h_n_male != h_n_male_s
drop if h_n_female != h_n_female_s
drop if h_n_male != num_boy
drop if h_n_female != num_girl
drop h_n_male h_n_male_s h_n_female h_n_female_s 

* child age < 18 
gen child_age = age if child 
egen max_child_age = max(child_age), by(region hhid)
drop if max_child_age >= 18


* father education, mother education
recode educ (4/10=4), gen(schooling)
gen f_edu = schooling if father
gen m_edu = schooling if mother 
egen h_f_edu = max(f_edu), by(region hhid)
egen h_m_edu = max(m_edu), by(region hhid)

* father eth, mother eth
gen f_han = ethnic == 1 if father
gen m_han = ethnic == 1 if mother 
egen h_f_han = max(f_han), by(region hhid)
egen h_m_han = max(m_han), by(region hhid)
gen h_fm_min = h_f_han == 0 | h_f_han == 0 


* keep child data 
keep if child 
tostring region hhid, replace 
egen hhid1 = group(region hhid)
gen prov = substr(region, 1,2)
destring prov, replace 
drop region hhid 
ren hhid1 hhid 
ren city_flag resid_type
gen birth_year = year_birth
gen birth_month = moth_birth
keep hhid sex age hktype birth_year birth_month schooling h_year_mar1  h_month_mar1  h_* child_num m_birth_age prov resid_type urban_hukou

gsort hhid age -birth_year
gen twins = hhid == hhid[_n-1] & age == age[_n-1] & birth_year == birth_year[_n-1] & birth_month == birth_month[_n-1]
replace twins = hhid == hhid[_n+1] & age == age[_n+1] & birth_year == birth_year[_n+1]& birth_month == birth_month[_n+1] if twins == 0

preserve 
keep hhid age birth_year birth_month
duplicates drop 
gsort hhid birth_year birth_month -age
bys hhid: gen order = _n
save "order05",replace
restore 

merge m:1 hhid age birth_year birth_month using "order05"
drop _merge 
gen year = 2005
count
save "twins2005",replace 
cap erase "order05.dta"


use "twins1982", clear 
destring prov, replace 
append using  "twins1990"
tostring prov, replace 
append using  "twins2000",force 
tostring resid_type,replace 
destring prov, replace 
append using  "twins2005"
* education issue
replace edulevel = . if edulevel == 0 
replace edulevel = schooling if mi(edulevel)
ren edulevel educ 
drop schooling
replace twins = 0 if twins == . 
 
* province issue
replace birth_year = 1000 + birth_year if year == 1990
replace birth_year = 1982 - age if year == 1982
replace birth_month = birth_ym - int(birth_ym/100) * 100 if mi(birth_month)
destring prov, replace 
gen province = prov 

gen birthyear = birth_year - 1 
merge m:1 province birthyear  using "fines", keepusing(fine)
drop if _merge == 2 
drop _merge  
ren fine fine_1 

replace birthyear = birthyear - 1 

merge m:1 province birthyear  using "fines", keepusing(fine)
drop if _merge == 2 
drop _merge 
ren fine fine_2


replace birthyear = birthyear + 2 
merge m:1 province birthyear  using "fines", keepusing(fine)
drop if _merge == 2 
drop _merge 
drop birthyear 

* Combine the provinces 
drop if prov == 54 // Tibet 
replace prov = 44 if prov == 46 // Hainan 
replace prov = 51 if prov == 50  // Chongqing

* Resident area type 
destring resid_type, replace 
replace rural = resid_type == 3 if mi(rural)

drop birth_ym
drop h_c_age-h_m_first 
destring prov, replace 


compress 
gen male = sex == 1 
gen urban = 1 - rural
gen fineXhan = fine_1* (1-h_fm_min)
gen h_fm_han = 1 - h_fm_min

sort year hhid 

gen twinsXorder = twins * order 
replace twinsXorder = . if twinsXorder == 0 

gen birth1 = birth_year if order == 1 
gen birth2 = birth_year if order == 2
egen h_birth1 = max(birth1), by(hhid year) 
egen h_birth2 = max(birth2), by(hhid year)
gen time_y1 = h_birth1 - h_year_mar1 
gen time_y2 = h_birth2 - h_birth1 
drop birth1 birth2 h_birth1 h_birth2 

gen birth1 = birth_month if order == 1 
gen birth2 = birth_month if order == 2
egen h_birth1 = max(birth1), by(hhid year) 
egen h_birth2 = max(birth2), by(hhid year)
gen time_m1 = h_birth1 - h_month_mar1 
gen time_m2 = h_birth2 - h_birth1 
drop birth1 birth2 h_birth1 h_birth2 

gen time_1 = time_y1 * 12 + time_m1
gen time_2 = time_y2 * 12 + time_m2
count
save "raw_data",replace 
cap erase "twins1982.dta"
cap erase "twins1990.dta"
cap erase "twins2000.dta"
cap erase "twins2005.dta"


********** Table 1 & Table 2 ******

use "raw_data",clear
keep hhid sex prov age rural urban child_num m_birth_age h_m_edu h_fm_min h_fm_han twins order year birth_year birth_month h_year_mar1 h_month_mar1 *_1 urban_hukou educ fine*

replace sex =. if twins == 1 
drop educ 
duplicates drop
*br if (hhid == hhid[_n-1] & order == order[_n-1]) |(hhid == hhid[_n+1] & order == order[_n+1]) 
replace urban_hukou = 1 if (hhid == hhid[_n-1] & order == order[_n-1]) |(hhid == hhid[_n+1] & order == order[_n+1]) 
duplicates drop 
drop if (hhid == hhid[_n-1] & order == order[_n-1]) |(hhid == hhid[_n+1] & order == order[_n+1]) 

replace fine = 0 if mi(fine) & birth_year <= 1979
replace fine_1 = 0 if mi(fine_1) & birth_year <= 1980 
replace fine_2 = 0 if mi(fine_2) & birth_year <= 1981 

gen fine_e = 6*fine_1 + 6*fine_2 if year == 1982 
replace fine_e = 3*fine_1 + 9*fine_2 if birth_month == 1 
replace fine_e = 4*fine_1 + 8*fine_2 if birth_month == 2
replace fine_e = 5*fine_1 + 7*fine_2 if birth_month == 3 
replace fine_e = 6*fine_1 + 6*fine_2 if birth_month == 4
replace fine_e = 7*fine_1 + 5*fine_2 if birth_month == 5 
replace fine_e = 8*fine_1 + 4*fine_2 if birth_month == 6 
replace fine_e = 9*fine_1 + 3*fine_2 if birth_month == 7 
replace fine_e = 10*fine_1 + 2*fine_2 if birth_month == 8 
replace fine_e = 11*fine_1 + 1*fine_2 if birth_month == 9
replace fine_e = 12*fine_1  if birth_month == 10
replace fine_e = 1*fine+11*fine_1  if birth_month == 11
replace fine_e = 2*fine+10*fine_1  if birth_month == 12

replace fine_e = fine_e/12

gen male = sex == 1 if !mi(sex)

recode order (3/. = 3)

replace fine_1 = 0 if birth_year <= 1979
replace fine_2 = 0 if birth_year <= 1980

forvalues i=1(1)3{
gen order`i'Xfine = (order ==`i')*fine_1
gen order`i'Xfine_e = (order ==`i')*fine_e

}

recode child_num (3/. = 3)

gen fine_eXmaj = fine_e * h_fm_han
gen fine_eXmin = fine_e * (1 - h_fm_han)
gen policy_79 = birth_year >= 1980
gen policyXmaj = policy_79 * h_fm_han
gen policyXmin = policy_79 * (1-h_fm_han)

replace twins = twins*100 

gen weight = 1
replace weight = 1/0.2 if year == 2005

tab prov, gen(prov_)
forvalues i = 1(1)28 {
gen prov`i'Xyob = (prov_`i' == 1 ) * (birth_year-1964)
}
drop prov_*
drop prov1Xyob

gen cluster_id = prov  // province clusters
replace urban_hukou = 8 if mi(urban_hukou)

tab h_m_edu, gen(m_edu_)
egen cell = group(year birth_year)
count
save "twins_order_data_reg", replace 
 

************* Time Data **********
use "raw_data",clear
replace fine = 0 if mi(fine) & birth_year <= 1979 
replace fine_1 = 0 if mi(fine_1) & birth_year <= 1980 
replace fine_2 = 0 if mi(fine_2) & birth_year <= 1981 

gen fine_e = 6*fine_1 + 6*fine_2 if year == 1982 
replace fine_e = 3*fine_1 + 9*fine_2 if birth_month == 1 
replace fine_e = 4*fine_1 + 8*fine_2 if birth_month == 2
replace fine_e = 5*fine_1 + 7*fine_2 if birth_month == 3 
replace fine_e = 6*fine_1 + 6*fine_2 if birth_month == 4
replace fine_e = 7*fine_1 + 5*fine_2 if birth_month == 5 
replace fine_e = 8*fine_1 + 4*fine_2 if birth_month == 6 
replace fine_e = 9*fine_1 + 3*fine_2 if birth_month == 7 
replace fine_e = 10*fine_1 + 2*fine_2 if birth_month == 8 
replace fine_e = 11*fine_1 + 1*fine_2 if birth_month == 9
replace fine_e = 12*fine_1  if birth_month == 10
replace fine_e = 1*fine+11*fine_1  if birth_month == 11
replace fine_e = 2*fine+10*fine_1  if birth_month == 12

replace fine_e = fine_e/12
keep hhid prov year order time* fine* birth_year h_m_edu h_fm_min h_fm_han m_birth_age twins rural 
sort year hhid order 
drop if order == order[_n-1] & hhid == hhid[_n-1]
replace time_1 = . if time_1 <=0 | time_1 >= 100
replace time_2 = . if time_2 <=0 | time_2 >= 100
gen policy_79 = birth_year >= 1980
gen policyXmaj = policy_79 * h_fm_han
gen policyXmin = policy_79 * (1-h_fm_han)

gen weight = 1
replace weight = 1/0.2 if year == 2005
egen cell = group(year birth_year)


tab prov, gen(prov_)
forvalues i = 1(1)28 {
gen prov`i'Xyob = (prov_`i' == 1 ) * (birth_year-1964)
}
drop prov_*
drop prov1Xyob
gen cluster_id = prov // province clusters 
count
save "time_data_reg", replace 

***** Regressions ****
set more off 
use "twins_order_data_reg", clear
drop if mi(fine_e)
replace birth_month = 0 if mi(birth_month)

set more off 
gen fine_exhan = fine_e* h_fm_han
gen policyxhan = (birth_year >= 1980) * h_fm_han

/* Appendix Table 1 */
su twins rural h_fm_han age  m_birth_age i.order [aw = weight]
su twins rural h_fm_han age  m_birth_age i.order if h_fm_han == 1 [aw = weight] 
su twins rural h_fm_han age  m_birth_age i.order if h_fm_han == 0 [aw = weight] 

gl control_1 = "rural h_fm_han i.h_m_edu i.order i.m_birth_age i.birth_month i.prov i.cell prov*Xyob"

/* Table 1 */
reg twins fine_e $control_1 [aw = weight], cluster(cluster_id)
outreg2 using "table1_2.xls",replace keep(fine_e) dec(3)
reg twins fine_e $control_1 [aw = weight] if h_fm_han == 1, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(fine_e) append dec(3)
reg twins fine_e $control_1 [aw = weight] if h_fm_han == 0, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(fine_e) append dec(3)
reg twins fine_e fine_exhan $control_1 [aw = weight], cluster(cluster_id)
outreg2 using "table1_2.xls", keep(fine_e fine_exhan ) append  dec(3)
reg twins policyxhan $control_1 [aw = weight], cluster(cluster_id)
outreg2 using "table1_2.xls", keep(policyxhan) append dec(3)

/* Table 2 */ 
reg twins order*Xfine_e i.order $control_1 [aw = weight] if h_fm_han == 1, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(order1Xfine_e order2Xfine_e order3Xfine_e) append dec(3)
reg twins fine_e i.order $control_1 [aw = weight] if h_fm_han == 1 & rural == 0, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(fine_e) append dec(3)
reg twins order*Xfine_e i.order $control_1 [aw = weight] if h_fm_han == 1 & rural == 0, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(order1Xfine_e order2Xfine_e order3Xfine_e) append dec(3)
reg twins fine_e i.order $control_1 [aw = weight] if h_fm_han == 1 & rural == 1, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(fine_e) append dec(3)
reg twins order*Xfine_e i.order $control_1 [aw = weight] if h_fm_han == 1 & rural == 1, cluster(cluster_id)
outreg2 using "table1_2.xls", keep(order1Xfine_e order2Xfine_e order3Xfine_e) append dec(3)

**** Table 3 ****
use "time_data_reg", clear 
set more off 
keep if birth_year <= 2001 
gen m_initial_age = int(m_birth_age - time_2/12)
replace m_initial_age = m_birth_age - time_y2 if mi(m_initial_age)
replace m_initial_age = 10 if m_initial_age <= 10 
replace m_initial_age = 40 if m_initial_age >= 40 & !mi(m_initial_age)
gen fine_u = fine if order == 1 
egen fine_use = max(fine_u), by(hhid year)
drop fine_e 
gen fine_e = fine_use
keep if order == 2 
replace time_2 = time_2/12 
replace time_2 = time_y2 if mi(time_2)
replace time_2 = . if time_2 >= 10 
gen twinsXpolicy = twins*(birth_year >=1980)
gl control_2 = "rural i.h_m_edu i.h_fm_han i.m_initial_age i.prov prov*Xyob i.cell"

/* Table 3 */ 
reg time_2 twinsXpolicy twins $control_2 [aw = weight],  cluster(cluster_id)
outreg2 using "table3.xls", keep(twinsXpolicy twins) replace dec(3)

reg time_2 twinsXpolicy twins $control_2 [aw = weight] if h_fm_han == 1, cluster(cluster_id)
outreg2 using "table3.xls", keep(twinsXpolicy twins) append  dec(3)

reg time_2 twinsXpolicy twins $control_2 [aw = weight] if h_fm_han == 0, cluster(cluster_id)
outreg2 using "table3.xls", keep(twinsXpolicy twins) append  dec(3)


****** CHNS Analysis ****
use "m10pexam",clear
keep  height hhid line wave commid t1-t5 gender age 

drop if height <50 // drop the outliers with shorter than 50 cm 
drop if mi(height)

gen age2 = age^2/100
gen urban = t2 == 1 

drop if age > 18 // keep those aged below 18 
* Define Twins * 
sort hhid wave age
keep if ((abs(age - age[_n-1]) <= 0.02) & wave == wave[_n-1] & hhid == hhid[_n-1] & line != line[_n-1]) ///
 | ((abs(age - age[_n+1]) <= 0.02) & wave == wave[_n+1] & hhid == hhid[_n+1] & line != line[_n+1])
gen birth_year = int(wave - age) + 1 

gen height_mi = mi(height)
egen height_mi_g = max(height_mi), by(hhid wave)
drop if height_mi_g == 1
drop height_mi height_mi_g
egen count_twins = sum(1), by(hhid wave) 
drop if count_twins == 1 
drop count_twins 

duplicates drop 
* Calculate the height differences * 

egen height_max = max(height), by(hhid wave)
egen height_min = min(height), by(hhid wave)

gen height_male_i = height * (gender == 1)
gen height_female_i = height * (gender == 2)
egen height_male = max(height_male_i), by(hhid wave)
egen height_female = max(height_female_i), by(hhid wave)

* Calculate the type of the twins * 
egen gender_max = max(gender), by(hhid wave)
egen gender_min = min(gender), by(hhid wave)
gen type = gender_max if gender_max == gender_min 
replace type = 3 if gender_max != gender_min 

gen height_gap = height_max - height_min 

gen taller_boy =  height_male > height_female & type == 3
gen height_mean = (height_max +height_min)/2
gen height_gap_n = height_max - height_min if type == 1 | type == 2 
replace height_gap_n = height_male - height_female  if type == 3
gen gap_ratio = height_gap/height_mean * 100

keep height_* height_mean gap_ratio age type birth_year hhid line wave taller_boy urban 
duplicates drop 
* Combine the birth year groups 

gen age2 = age^2/100
gen age_int = int(age)
gen prov = int(hhid/1e7)

gen commid = int(hhid/1000)

ren prov province 
gen birthyear = birth_year - 1 
merge m:1 province birthyear using "fines.dta" 
drop if _merge == 2 
drop _merge 
replace fine = 0 if birth_year <= 1979 & fine == . 
gen identical = type == 1 | type == 2 

ren fine fine_1 
replace birthyear = birth_year - 1 
merge m:1 province birthyear using "fines.dta" 
drop if _merge == 2 
drop _merge 
replace fine = 0 if birth_year <= 1979 & fine == . 
ren fine fine_2
* Define the effective fine rate 
gen fine = 0.5*fine_1 + 0.5 * fine_2 
* Combine the birth year groups
recode birth_year (1979/1985= 1) (1986/1990 = 2)(1991/1995 = 3) (1996/2001 = 4) (2002/2006 = 5) (2007 / 2009 = 6), gen(birth_group)
* Provinces combination 
gen cluster_id = prov 
replace prov = 21 if prov == 23
replace prov = 42 if prov == 43 
replace prov = 45 if prov == 52
replace prov = 41 if prov == 37 

duplicates drop
sort hhid wave 
drop if hhid == hhid[_n-1] & wave == wave[_n-1] //drop duplicates households 

* Merge information from Mother 
preserve 
merge 1:m hhid line wave using "m10relatives"
keep if _merge == 3
sort hhid wave
keep if relation == 2
keep o_hhid o_line
ren o_hhid hhid
ren o_line line 
merge m:m hhid line using "m10pexam"
keep if _merge == 3
keep hhid wave age 
ren age mother_age
save "mother_age",replace 
restore 

sort hhid wave
merge 1:m hhid wave using  "mother_age"
drop if _merge == 2 
drop _merge 

sort hhid wave 

drop if gap_ratio >= 8 & identical // outliers due to measures, by gr box  
gen birth_age = mother_age - age 
gen birth_mi = mi(birth_age)

sort prov birth_year 
gen identXfine = identical * fine
gen diffXfine = (1-identical) * fine

egen birth_age_mean = mean(birth_age) 
replace birth_age = birth_age_mean if mi(birth_age) 
drop birth_age_mean

save "twins_chns_use",replace 

use "twins_chns_use", clear 
/* Appendix Table A2 */
su height_gap  height_mean gap_ratio 
su height_gap  height_mean gap_ratio if identical 
su height_gap  height_mean gap_ratio if !identical 

/* Table 4 */
gl control_fine_1 = "height_mean taller_boy urban age age2 birth_age i.birth_g i.prov i.wave"
* Panel A
reg height_gap fine identical $control_fine_1,  cluster(cluster_id)
outreg2 using "table4.xls",replace keep(fine) dec(2)
reg height_gap identXfine diffXfine identical $control_fine_1 ,  cluster(cluster_id)
outreg2 using "table4.xls",append keep(identXfine diffXfine) dec(2)
reg gap_ratio fine identical $control_fine_1,  cluster(cluster_id)
outreg2 using "table4.xls",append keep(fine) dec(2)
reg gap_ratio identXfine diffXfine identical $control_fine_1 ,  cluster(cluster_id)
outreg2 using "table4.xls",append keep(identXfine diffXfine)  dec(2)


* Panel B
sort hhid wave 
drop if hhid == hhid[_n-1] // keep the only wave
gl control_fine_2 = "height_mean taller_boy urban age age2 birth_age i.birth_g i.prov"
reg height_gap fine identical $control_fine_2, cluster(cluster_id)
outreg2 using "table4.xls",append keep(fine) dec(2)
reg height_gap identXfine diffXfine identical  $control_fine_2 ,  cluster(cluster_id)
outreg2 using "table4.xls",append keep(identXfine diffXfine) dec(2)

reg gap_ratio fine identical  $control_fine_2, cluster(cluster_id)
outreg2 using "table4.xls",append keep(fine) dec(2)
reg gap_ratio identXfine diffXfine identical $control_fine_2 ,  cluster(cluster_id)
outreg2 using "table4.xls",append keep(identXfine diffXfine) dec(2)

cap erase "mother_age.dta"
cap erase "twins_chns_use.dta"




*** Figure 1 and Figure 2 ***
use "twins_order_data_reg", clear
set scheme s2mono
* Figure 1
preserve 
collapse twins [aw = weight], by(birth_year)
tw scatter twins birth_year, xtit(Year of birth) xlabel(1965(5)2005) ytit("Birth rate (%)") ///
note("Data source is 1982, 1990, 2000 and 2005 Census.") ylabel(0.2(0.1)0.9) xline(1979, lp(dash) ) 
restore 

* Figure 2
preserve 
collapse twins [aw = weight], by(birth_year h_fm_han)
reshape wide twins, i(birth_year) j(h_fm_han)
cap drop twins1_l twins0_l
lowess twins1 birth_year if birth_year >= 1971 & birth_year <= 2003, gen(twins1_l) nograph
lowess twins0 birth_year if birth_year >= 1971 & birth_year <= 2003, gen(twins0_l) nograph

tw (scatter twins1 twins0 birth_year, xtit(Year of birth) xlabel(1965(5)2005) ytit("Birth rate (%)") ///
note("Data source is 1982, 1990, 2000 and 2005 Census.") ylabel(0.2(0.1)0.9) xline(1979, lp(dash) ) ///
 legend(label(1 Han) label(2 Minority) label(3 Han smooth) label(4 Minority smooth))) ///
(line twins1_l twins0_l birth_year)
restore 

* Appendix Figure A1 ***
use "fines",clear 
drop if province == 54
ren birthyear year 

tw line fine year, by(province)

** Appendix Table B1 *****

use "twins_order_data_reg", clear
cap drop fine_* 
gen province = prov 
forvalues i = 1(2)5{
gen birthyear = birth_year +`i'
merge m:1 province birthyear using "fines", keepusing(fine) 
ren fine fine_`i'
drop if _merge == 2
drop _merge 
drop birthyear
}
replace birth_month = 0 if mi(birth_month)
keep if birth_year > 1980

gl control_test = "rural h_fm_han i.order i.m_birth_age i.birth_month i.prov i.cell prov*Xyob"

replace twins = twins/100 
* Appendix Table B1
reg fine_1 twins $control_test [aw = weight], cluster(cluster_id)
outreg2 using "table_b1.xls", keep( twins) replace dec(3)
reg fine_3 twins $control_test [aw = weight], cluster(cluster_id)
outreg2 using "table_b1.xls", keep( twins) append dec(3)
reg fine_5 twins $control_test [aw = weight], cluster(cluster_id)
outreg2 using "table_b1.xls", keep( twins) append dec(3)
log close 
